start_time <- Sys.time()
# Import functions
root = "./"
source(file.path(root,"MAIN.R"))
import_parameters(params)## **** __Utilized Cores__ **** = 2$subsetGenes
## [1] "protein_coding"
##
## $subsetCells
## [1] 500
##
## $resolution
## [1] 0.6
##
## $resultsPath
## [1] "./Results"
##
## $nCores
## [1] 2
##
## $perplexity
## [1] 30
# load("Results/Current_Pipeline/scRNAseq_results.RData")
load(file.path(resultsPath, "scRNAseq_results.RData"))
# resultsPath <- "Results/subsetGenes-protein_coding__subsetCells-F__Resolution-0.2__perplexity-40__nCores-4"
print("Written using: Seurat version* 2.3.4 2018-07-17")## [1] "Written using: Seurat version* 2.3.4 2018-07-17"
# https://satijalab.org/seurat/install.html
library(Seurat) #
paste("Seurat", packageVersion("Seurat")) ## [1] "Seurat 2.3.4"
# library(monocle) # BiocManager::install("monocle")
# paste("monocle", packageVersion("monocle"))
library(monocle3)
paste("monocle3", packageVersion("monocle3")) ## [1] "monocle3 0.1.2"
library(garnett) # devtools::install_github("cole-trapnell-lab/garnett")
paste("garnett", packageVersion("garnett"))## [1] "garnett 0.2.3"
library(cowplot)
library(ggplot2)
library(dplyr)
library(data.table)
library(readxl)
library(reshape2)
library(ggrepel)
library(plotly)
library(GeneOverlap) # BiocManager::install("GeneOverlap")
# Exporting 3D plots
knitr::knit_hooks$set(webgl = rgl::hook_webgl)
if(getwd()=="/Users/schilder/Desktop/PD_scRNAseq"){
allGenes <- F
test.use <- "wilcox"
}else{
allGenes <- T
# MAST not install on Minerva...
test.use <- "wilcox"}
allGenes <- T
nCores <- parallel::detectCores()
set.seed(2019)Also subset to only protein-coding genes.
## Subsetting genes: protein_coding
proteins <- biotypes[biotypes$gene_biotype=="protein_coding",]$hgnc_symbol %>% droplevels()
protDAT <- subset_seurat(DAT, genes.use = proteins)
cds <- seurat_to_monocle(seurat_object = DAT, monocle_version = 3)## Object representation is consistent with the most current Seurat version.
## [1] "Processing..."
## [1] "+ Expression data"
## [1] "+ Phenotype data"
## [1] "+ Feature Data"
## [1] "Expression Data dims:"
## [1] 24914 19144
## [1] "Metadata dims:"
## [1] 19144 17
## [1] "Feature Data dims:"
## [1] 24914 1
## [1] "+ Converting to monocle (Version 3)"
cds <- monocle3::preprocess_cds(cds,
num_dim = 20, #100 by default
residual_model_formula_str = "~ nUMI + percent.mito")
monocle3::plot_pc_variance_explained(cds) Using UMAP.
# c("UMAP", "tSNE", "PCA", "LSI")
cds <- monocle3::reduce_dimension(cds, reduction_method = "UMAP",
max_components = 3
# umap.fast_sgd = T,
# cores=nCores
) ## No trajectory to plot. Has learn_graph() been called yet?
## No trajectory to plot. Has learn_graph() been called yet?
## No trajectory to plot. Has learn_graph() been called yet?
In addition to clusters this function calculates partitions, which represent superclusters of the Louvain communities that are found using a kNN pruning method. Cluster assignments can be accessed using the clusters function and partition assignments can be accessed using the partitions function.
Using only the topN variable genes to cluster
# Method 1
# variable.genes <- cds@preprocess_aux@listData$gene_loadings[,1:3] %>%
# abs() %>% rowSums() %>% base::sort(decreasing = T)
# head(variable.genes)
varDAT <- Seurat::FindVariableGenes(object = protDAT,
mean.function = ExpMean,
dispersion.function = LogVMR,
selection.method ="dispersion", do.plot = T,
top.genes = 2000)## Warning in KernSmooth::bkde2D(x, bandwidth = bandwidth, gridsize = nbin, :
## Binning grid too coarse for current (small) bandwidth: consider increasing
## 'gridsize'
## [1] "TMSB4X" "FTL" "S100A8" "FTH1" "B2M" "S100A9"
##
|
| | 0%
|
|=================================================================| 100%
## Warning in louvain_clustering(data, pd[row.names(data), ], k = k, weight = weight, : RANN counts the point itself, k must be smaller than
## the total number of points - 1 (all other points) - 1 (itself)!
# Order cells
root_pr_nodes = get_earliest_principal_node(cds, variable = "dx", variable_value = "PD")
cds <- monocle3::order_cells(cds, root_pr_nodes = root_pr_nodes)
monocle3::plot_cells(cds)## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## [1] "DGE: 100 genes x 19144 samples"
## [1] "+ Calculated in 0 hours."
## [1] "+ 62 significiant DGE(s) detected."
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
marker_test_res = monocle3::top_markers(cds[var.genes[1:100], ],
group_cells_by="cluster",
reference_cells=1000,
cores=nCores)##
| | 0%, ETA NA
|= | 1%, ETA 00:02
|= | 2%, ETA 00:01
|== | 3%, ETA 00:01
|== | 4%, ETA 00:01
|=== | 5%, ETA 00:01
|=== | 6%, ETA 00:00
|==== | 7%, ETA 00:00
|==== | 8%, ETA 00:00
|===== | 9%, ETA 00:00
|===== | 10%, ETA 00:00
|====== | 11%, ETA 00:00
|======= | 13%, ETA 00:00
|======== | 15%, ETA 00:00
|========= | 16%, ETA 00:00
|========= | 17%, ETA 00:00
|========== | 18%, ETA 00:00
|=========== | 20%, ETA 00:00
|============ | 23%, ETA 00:00
|============= | 24%, ETA 00:00
|============== | 26%, ETA 00:00
|=============== | 27%, ETA 00:00
|================ | 30%, ETA 00:00
|================= | 31%, ETA 00:00
|================= | 32%, ETA 00:00
|================== | 33%, ETA 00:00
|================== | 34%, ETA 00:00
|==================== | 37%, ETA 00:00
|===================== | 38%, ETA 00:00
|====================== | 40%, ETA 00:00
|=========================== | 50%, ETA 00:00
|============================ | 51%, ETA 00:00
|============================ | 52%, ETA 00:00
|============================= | 53%, ETA 00:00
|============================== | 56%, ETA 00:00
|=============================== | 58%, ETA 00:00
|================================== | 63%, ETA 00:00
|=================================== | 64%, ETA 00:00
|==================================== | 66%, ETA 00:00
|==================================== | 67%, ETA 00:00
|===================================== | 69%, ETA 00:00
|======================================== | 74%, ETA 00:00
|============================================ | 81%, ETA 00:00
|================================================= | 91%, ETA 00:00
|===================================================== | 98%, ETA 00:00
|==================================================| 100%, Elapsed 00:00
##
| | 0%, ETA NA
| | 1%, ETA 00:42
|= | 1%, ETA 00:21
|= | 2%, ETA 00:14
|= | 3%, ETA 00:11
|== | 3%, ETA 00:15
|== | 4%, ETA 00:16
|=== | 5%, ETA 00:15
|=== | 5%, ETA 00:13
|=== | 6%, ETA 00:16
|==== | 7%, ETA 00:14
|==== | 7%, ETA 00:13
|==== | 8%, ETA 00:12
|===== | 9%, ETA 00:19
|===== | 9%, ETA 00:18
|===== | 10%, ETA 00:16
|====== | 11%, ETA 00:15
|====== | 11%, ETA 00:17
|====== | 12%, ETA 00:16
|======= | 13%, ETA 00:15
|======= | 13%, ETA 00:14
|======== | 14%, ETA 00:15
|======== | 15%, ETA 00:15
|======== | 15%, ETA 00:14
|========= | 16%, ETA 00:13
|========= | 17%, ETA 00:14
|========= | 17%, ETA 00:13
|========== | 18%, ETA 00:13
|========== | 19%, ETA 00:13
|========== | 19%, ETA 00:12
|=========== | 20%, ETA 00:12
|=========== | 21%, ETA 00:12
|============ | 21%, ETA 00:11
|============ | 22%, ETA 00:11
|============ | 23%, ETA 00:11
|============= | 23%, ETA 00:11
|============= | 24%, ETA 00:10
|============= | 25%, ETA 00:10
|============== | 25%, ETA 00:10
|============== | 26%, ETA 00:10
|============== | 27%, ETA 00:09
|=============== | 27%, ETA 00:09
|=============== | 28%, ETA 00:09
|=============== | 29%, ETA 00:09
|================ | 29%, ETA 00:08
|================ | 30%, ETA 00:08
|================= | 31%, ETA 00:08
|================= | 31%, ETA 00:08
|================= | 32%, ETA 00:08
|================== | 33%, ETA 00:08
|================== | 33%, ETA 00:07
|================== | 34%, ETA 00:07
|=================== | 35%, ETA 00:07
|=================== | 35%, ETA 00:07
|=================== | 36%, ETA 00:07
|==================== | 37%, ETA 00:07
|==================== | 37%, ETA 00:07
|===================== | 38%, ETA 00:06
|===================== | 39%, ETA 00:06
|===================== | 39%, ETA 00:06
|====================== | 40%, ETA 00:06
|====================== | 41%, ETA 00:06
|====================== | 41%, ETA 00:06
|======================= | 42%, ETA 00:06
|======================= | 43%, ETA 00:06
|======================= | 43%, ETA 00:06
|======================== | 44%, ETA 00:05
|======================== | 45%, ETA 00:05
|======================== | 45%, ETA 00:05
|========================= | 46%, ETA 00:05
|========================= | 47%, ETA 00:05
|========================== | 47%, ETA 00:05
|========================== | 48%, ETA 00:05
|========================== | 49%, ETA 00:05
|=========================== | 49%, ETA 00:05
|=========================== | 50%, ETA 00:05
|=========================== | 51%, ETA 00:04
|============================ | 51%, ETA 00:04
|============================ | 52%, ETA 00:04
|============================ | 53%, ETA 00:04
|============================= | 53%, ETA 00:04
|============================= | 54%, ETA 00:04
|============================== | 55%, ETA 00:04
|============================== | 55%, ETA 00:04
|============================== | 56%, ETA 00:04
|=============================== | 57%, ETA 00:04
|=============================== | 57%, ETA 00:04
|=============================== | 58%, ETA 00:03
|================================ | 59%, ETA 00:03
|================================ | 59%, ETA 00:03
|================================ | 60%, ETA 00:03
|================================= | 61%, ETA 00:03
|================================= | 61%, ETA 00:03
|================================= | 62%, ETA 00:03
|================================== | 63%, ETA 00:03
|================================== | 63%, ETA 00:03
|=================================== | 64%, ETA 00:03
|=================================== | 65%, ETA 00:03
|=================================== | 65%, ETA 00:03
|==================================== | 66%, ETA 00:03
|==================================== | 67%, ETA 00:03
|==================================== | 67%, ETA 00:03
|===================================== | 68%, ETA 00:03
|===================================== | 69%, ETA 00:03
|===================================== | 69%, ETA 00:03
|====================================== | 70%, ETA 00:03
|====================================== | 71%, ETA 00:03
|======================================= | 71%, ETA 00:03
|======================================= | 72%, ETA 00:03
|======================================= | 73%, ETA 00:03
|======================================== | 73%, ETA 00:03
|======================================== | 74%, ETA 00:02
|======================================== | 75%, ETA 00:02
|========================================= | 75%, ETA 00:02
|========================================= | 76%, ETA 00:02
|========================================= | 77%, ETA 00:02
|========================================== | 77%, ETA 00:02
|========================================== | 78%, ETA 00:02
|========================================== | 79%, ETA 00:02
|=========================================== | 79%, ETA 00:02
|=========================================== | 80%, ETA 00:02
|============================================ | 81%, ETA 00:02
|============================================ | 81%, ETA 00:02
|============================================ | 82%, ETA 00:02
|============================================= | 83%, ETA 00:01
|============================================= | 83%, ETA 00:01
|============================================= | 84%, ETA 00:01
|============================================== | 85%, ETA 00:01
|============================================== | 85%, ETA 00:01
|============================================== | 86%, ETA 00:01
|=============================================== | 87%, ETA 00:01
|=============================================== | 87%, ETA 00:01
|================================================ | 88%, ETA 00:01
|================================================ | 89%, ETA 00:01
|================================================ | 89%, ETA 00:01
|================================================= | 90%, ETA 00:01
|================================================= | 91%, ETA 00:01
|================================================= | 91%, ETA 00:01
|================================================== | 92%, ETA 00:01
|================================================== | 93%, ETA 00:01
|================================================== | 93%, ETA 00:01
|=================================================== | 94%, ETA 00:00
|=================================================== | 95%, ETA 00:00
|=================================================== | 95%, ETA 00:00
|==================================================== | 96%, ETA 00:00
|==================================================== | 97%, ETA 00:00
|===================================================== | 97%, ETA 00:00
|===================================================== | 98%, ETA 00:00
|===================================================== | 99%, ETA 00:00
|======================================================| 99%, ETA 00:00
|==================================================| 100%, Elapsed 00:08
# Cluster-specific markers
top_specific_markers = marker_test_res %>%
filter(fraction_expressing >= 0.10) %>%
group_by(cell_group) %>%
top_n(4, pseudo_R2)
DT::datatable(top_specific_markers) ## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
# monocle3::plot_genes_by_group(cds,
# markers=top_specific_marker_ids,
# group_cells_by="partition",
# ordering_type="maximal_on_diag",
# max.size=3)## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 26788 rows containing non-finite values (stat_ydensity).
## Warning: Removed 26788 rows containing non-finite values (stat_summary).
# UMAP
monocle3::plot_cells(cds,
genes=c("CD14","FCGR3A"),
group_cells_by = "cluster",
show_trajectory_graph = F)monocle3::plot_cells(cds,
genes=unique(top_specific_markers$gene_id),
group_cells_by = "cluster",
show_trajectory_graph = F)Use a pre-trained classifier from Pline et al.
## Loading required package: AnnotationDbi
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
##
## [1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT"
## [5] "ENSEMBLTRANS" "ENTREZID" "ENZYME" "EVIDENCE"
## [9] "EVIDENCEALL" "GENENAME" "GO" "GOALL"
## [13] "IPI" "MAP" "OMIM" "ONTOLOGY"
## [17] "ONTOLOGYALL" "PATH" "PFAM" "PMID"
## [21] "PROSITE" "REFSEQ" "SYMBOL" "UCSCKG"
## [25] "UNIGENE" "UNIPROT"
load(url("https://cole-trapnell-lab.github.io/garnett/classifiers/hsPBMC"))
cds = garnett::classify_cells(cds,
hsPBMC,
db = org.Hs.eg.db,
cluster_extend = T,
cds_gene_id_type = "SYMBOL")## Warning in doTryCatch(return(expr), name, parentenv, handler): The
## following genes used in the classifier are not present in the input CDS.
## Interpret with caution. ENSG00000174059The following genes used in the
## classifier are not present in the input CDS. Interpret with caution.
## ENSG00000007062The following genes used in the classifier are not present
## in the input CDS. Interpret with caution. ENSG00000157404The following
## genes used in the classifier are not present in the input CDS. Interpret
## with caution. ENSG00000185291
monocle3::plot_cells(cds,
group_cells_by="cluster",
color_cells_by="cluster_ext_type", # cell_type
show_trajectory_graph = F )Find genes of interest through graph-autocorrelation analysis.
Identify co-expression modules.
find_gene_modules essentially runs UMAP on the genes (as opposed to the cells) and then groups them into modules using Louvain community analysis.
gene_module_df = monocle3::find_gene_modules(cds[var.genes[1:1000], ],
resolution=1e-2,
cores=nCores)
# Create pheatmap table
cell_group_df = tibble::tibble(cell=row.names(colData(cds)),
cell_group=clusters(cds)[colnames(cds)])
agg_mat = aggregate_gene_expression(cds, gene_module_df, cell_group_df)
row.names(agg_mat) = stringr::str_c("Module ", row.names(agg_mat))
colnames(agg_mat) = stringr::str_c("Cluster ", colnames(agg_mat))
pheatmap::pheatmap(agg_mat, cluster_rows=TRUE, cluster_cols=TRUE,
scale="column", clustering_method="ward.D2",
fontsize=6)